import random
import numpy as np
from rlcard.agents.best_response_agent import BRAgent
from copy import deepcopy


def exploitability(env, agent, evaluate_num):
    avg_exp = 0.0
    for i in range(evaluate_num):
        num_players = env.player_num
        if num_players != 2:
            raise "Not two-players game"
        agents_1 = [BRAgent(env, agent), agent]
        agents_2 = [agent, BRAgent(env, agent)]
        env.set_agents(agents_1)
        state1, player_id1 = env.reset()
        value_sum = 0
        br_val1 = agents_1[0].value(player_id1, state1, 0)
        p_val1 = agents_1[0].value(player_id1, state1, 1)
        val1 = abs(br_val1 - p_val1)
        env.set_agents(agents_2)
        state2, player_id2 = env.reset()
        br_val2 = agents_2[1].value(player_id1, state1, 1)
        p_val2 = agents_2[1].value(player_id1, state1, 0)
        val2 = abs(br_val2 - p_val2)
        expoit = (val1+val2)/2
        avg_exp += expoit
    avg_exp /= evaluate_num
    return avg_exp
